	cd D:\Dropbox\book_welfare\replication\



*****************************************************
 * create best sellers  data from various sources
 ****************************************************
 * New York Times from github  
 * source: https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-05-10/nyt_full.tsv 
 
use  data\nyt_github.dta, clear 
 
			split author, parse(" ")
	
			gen NAME=upper(author1)
			rename NAME name 
			merge m:1 name using data\name_gender_wipo.dta
			
	 preserve 
		keep if mshare==.
		keep  author 
		gen x = 1 
		collapse (sum) n=x, by(author)
		gsort - n
	 restore 

	preserve 
		import excel data\hand_match_name_gender.xlsx, sheet("nyt") firstrow clear
		keep author male 
		gen xfemshare=1 if male==0 
		replace xfemshare=0 if male==1 
		keep author xfemshare
		tempfile fem 
		save `fem'
	restore 
	
	merge m:1 author using `fem'
 
	replace femshare = xfemshare if xfemshare~=. 
	 drop xfemshare 
	 drop if _merge ==2 
	 drop _merge 
				
			

	gen x=1
	collapse (sum) nyt_m = mshare (count) nyt_name=mshare nyt_total=x, by(year)
			
	tempfile nyt 
	save `nyt'
	
*******************************************************	
* Publishers Weekly 
	
use data\pw_bestsellers_1895_2016.dta, clear 
	

	
	split author1,parse(" ")
	gen name=author11
	replace name=proper(author11)
			gen NAME=upper(name)
			drop name 
			rename NAME name 
			merge m:1 name using data\name_gender_wipo.dta


	drop if _merge ==2 
	
	gen x=1 
	collapse (sum)  pw_m =mshare (count) pw_name=mshare pw_total=x, by(year)
	tempfile pw 
	save `pw'

	
	
*****************************************	
* USA Today 

use data\usat_1997_2020.dta, clear 

	split author, parse(", ")
	drop author?? 
	
	split author1, parse(" ")
	gen name=proper(author11)
	
			gen NAME=upper(name)
			drop name 
			rename NAME name 
			merge m:1 name using data\name_gender_wipo.dta


	drop if _merge ==2 
	
	gen x=1 
	collapse (sum)  usat_m = mshare (count) usat_name=mshare usat_total=x, by(year)
	
	merge m:1 year using `nyt'
	drop _merge 
	merge m:1 year using `pw'

	drop _merge 
	
save data\bestsellers.dta, replace 